#Does a teenager’s location in the United States increase ones chances of getting pregnant?

#Do counties with high teenage birthrates neighbor other counties with high teenage birthrates? #Do states level of sexual education impact Teenage Birth Rates? Is there a difference between rural and urban counties? regression - can predict teenage birth rates based on sexedu

Descriptive/Exploratory - without trying to model it spatial autocorrelation between of birthrates, if there are clusters try to make a hypothesis about why, could map it out

is religious beliefs, general school policies - clustering - local Moran I why there are these geospatial difference

#Relevant Definitions Pregnancy rate: refers to the rate of successful pregnancies. Birth Rate: number of live births per thousand of population per year

library(terra) |> suppressMessages()
## Warning: package 'terra' was built under R version 4.2.3
library(tmap)|> suppressMessages()
library(stringr) |> suppressMessages()
## Warning: package 'stringr' was built under R version 4.2.3
library(spdep) # package for spatial dependence
## Warning: package 'spdep' was built under R version 4.2.3
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.2.3
## Loading required package: sf
## Warning: package 'sf' was built under R version 4.2.3
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(sf)|> suppressMessages()
library(dplyr)|> suppressMessages()
## Warning: package 'dplyr' was built under R version 4.2.3
library(tidyverse)|> suppressMessages()
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
library(maps)|> suppressMessages()
## Warning: package 'maps' was built under R version 4.2.3
library(readxl)|> suppressMessages()
## Warning: package 'readxl' was built under R version 4.2.3
library(tigris)
## Warning: package 'tigris' was built under R version 4.2.3
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
## 
## Attaching package: 'tigris'
## The following object is masked from 'package:terra':
## 
##     blocks
library(ggplot2)
library(ggpattern)
## Warning: package 'ggpattern' was built under R version 4.2.3
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 4.2.3
options(tigris_use_cache = TRUE)

#Coding

# Load state and county data
#states_sf <- states(class = "sf")**
#counties_sf <- counties(class = "sf")**

states <- tigris::states(cb = TRUE, class = "sf")  
## Retrieving data for the year 2021
counties <- tigris::counties(cb = TRUE, class = "sf")
## Retrieving data for the year 2022
# cb = TRUE for a low-resolution version for faster processing
# Visualize the map with state and county boundaries
#map <- mapview::mapview(states) + mapview::mapview(counties)**
#print(map)**

all_sf <- bind_rows(
  mutate(states, type = "state"),
  mutate(counties, type = "county")
)
mapview::mapview(all_sf)
# Filter out US territories from dataset
# List of US states
us_states <- c("alabama", "arizona", "arkansas", "california", 
               "colorado", "connecticut", "delaware", "florida", "georgia", 
               "idaho", "illinois", "indiana", "iowa", "kansas", 
               "kentucky", "louisiana", "maine", "maryland", "massachusetts", 
               "michigan", "minnesota", "mississippi", "missouri", "montana", 
               "nebraska", "nevada", "new hampshire", "new jersey", "new mexico", 
               "new york", "north carolina", "north dakota", "ohio", 
               "oklahoma", "oregon", "pennsylvania", "rhode island", "south carolina", 
               "south dakota", "tennessee", "texas", "utah", "vermont", 
               "virginia", "washington", "west virginia", "wisconsin", "wyoming")
all_sf <- all_sf %>%
  filter(tolower(STATE_NAME) %in% us_states)
unique(all_sf$STATE_NAME)
##  [1] "Alabama"        "Arizona"        "Arkansas"       "California"    
##  [5] "Colorado"       "Delaware"       "Florida"        "Illinois"      
##  [9] "Georgia"        "Idaho"          "Michigan"       "Indiana"       
## [13] "Iowa"           "New York"       "Kansas"         "Kentucky"      
## [17] "Louisiana"      "Maine"          "Maryland"       "Massachusetts" 
## [21] "Minnesota"      "Mississippi"    "Missouri"       "Montana"       
## [25] "Nebraska"       "New Jersey"     "New Mexico"     "North Carolina"
## [29] "North Dakota"   "Ohio"           "Oklahoma"       "Oregon"        
## [33] "Pennsylvania"   "South Carolina" "South Dakota"   "Tennessee"     
## [37] "Texas"          "West Virginia"  "Utah"           "Vermont"       
## [41] "Virginia"       "Washington"     "Wisconsin"      "Wyoming"       
## [45] "Connecticut"    "Rhode Island"   "Nevada"         "New Hampshire"
#Load birth rate data from NCHS
birthrate <- read_excel("C:/Users/melon/OneDrive/Desktop/Masters/Hertie/Geospatial/NCHS2016-Teen_Birth_Rates.xlsx")

#Load Sex Education data by State from Guttmacher Data Center
sexedu <-read_excel("C:/Users/melon/OneDrive/Desktop/Masters/Hertie/Geospatial/GuttmacherDataCenter-SexEdbyState_treated.xlsx")
# Merge birthrate data with all_sf
all_sf <- all_sf %>%
  left_join(birthrate, by = c("NAME" = "County", "STATE_NAME" = "State"))

# Merge sex education data with all_sf
all_sf <- all_sf %>%
  left_join(sexedu, by = c("STATE_NAME" = "U.S. State"))
#Determine best # of groups to put separate birthrate data into
library(cluster)  # For clustering algorithms
## Warning: package 'cluster' was built under R version 4.2.3
## 
## Attaching package: 'cluster'
## The following object is masked from 'package:maps':
## 
##     votes.repub
print(head(birthrate))
## # A tibble: 6 × 9
##    Year State   County `State FIPS Code` `County FIPS Code` `Combined FIPS Code`
##   <dbl> <chr>   <chr>              <dbl>              <dbl>                <dbl>
## 1  2016 Alabama Autau…                 1                  1                 1001
## 2  2016 Alabama Baldw…                 1                  3                 1003
## 3  2016 Alabama Barbo…                 1                  5                 1005
## 4  2016 Alabama Bibb                   1                  7                 1007
## 5  2016 Alabama Blount                 1                  9                 1009
## 6  2016 Alabama Bullo…                 1                 11                 1011
## # ℹ 3 more variables: `Birth Rate` <dbl>, `Lower Confidence Limit` <dbl>,
## #   `Upper Confidence Limit` <dbl>
# Assuming 'birthrate' is your dataframe and 'Birth_Rate' is the column
data <- birthrate$`Birth Rate` %>% na.omit()  # Remove NA values for clustering
wss <- map_dbl(1:10, function(k) {
  kmeans(data, centers = k, nstart = 25)$tot.withinss
})

# Plotting the within-group sum of squares by number of clusters
qplot(1:10, wss, geom = "line") +
  labs(x = "Number of Clusters", y = "Total Within Sum of Squares", title = "Elbow Method for Optimal k")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Clean dataset of NAs

# Need to remove NAs so I can run Spatial Autocorrelations/Moran's Test
all_sf_clean <- all_sf %>%
  filter(!is.na(`Birth Rate`))

##Create State Geometries

# Assuming all_sf_clean is already an sf object
states_geom <- all_sf_clean %>%
  group_by(STATE_NAME) %>%
  summarise(geometry = st_union(geometry)) %>%
  ungroup()

# Check the result
print(states_geom)
## Simple feature collection with 47 features and 1 field
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -124.7631 ymin: 24.5213 xmax: -66.9499 ymax: 49.00249
## Geodetic CRS:  NAD83
## # A tibble: 47 × 2
##    STATE_NAME                                                           geometry
##    <chr>                                                          <GEOMETRY [°]>
##  1 Alabama    MULTIPOLYGON (((-88.05235 30.50559, -88.04504 30.50119, -88.02675…
##  2 Arizona    POLYGON ((-110.0007 36.99798, -110.0009 36.9985, -110.0218 36.998…
##  3 Arkansas   POLYGON ((-94.43084 35.39346, -94.43066 35.39248, -94.43146 35.39…
##  4 California MULTIPOLYGON (((-119.5854 38.71321, -119.5871 38.71435, -119.5877…
##  5 Colorado   POLYGON ((-109.0435 37.48482, -109.0458 37.37499, -109.0458 37.35…
##  6 Delaware   MULTIPOLYGON (((-75.56681 39.50919, -75.56545 39.50941, -75.56144…
##  7 Florida    MULTIPOLYGON (((-84.43628 29.97898, -84.43546 29.98175, -84.43617…
##  8 Georgia    MULTIPOLYGON (((-84.97196 32.3777, -84.97433 32.37458, -84.97727 …
##  9 Idaho      POLYGON ((-117.0398 46.81544, -117.0395 46.73673, -117.0395 46.73…
## 10 Illinois   POLYGON ((-91.51308 40.17854, -91.51196 40.17044, -91.50822 40.15…
## # ℹ 37 more rows

Map birth rate by county

custom_palette <- colorRampPalette(c("blue", "pink", "red"))(100)  

tmap_mode("plot")
## tmap mode set to 'plot'
tm <- tm_shape(all_sf) +
  tm_polygons("Birth Rate", id = "NAME",
              palette = custom_palette, title = "Teen Birth Rate by County",
              style = "quantile", n = 3,
              border.col = "black", border.alpha = 0.5) +
  tm_shape(states_geom) +  # Add the shape for the states
  tm_borders(lwd = 1.5, col = "black") +  # Draw state borders
  tm_layout(
    frame = FALSE,
    main.title = "Average Teen Birth Rate by County",
    font.size = 10,  # Adjust base font size
    outer.margins = c(0, 0.02, 0.02, 0.02),  # Reduce outer margins
    legend.position = c("right", "bottom"),
    legend.bg.color = "white",  # Adding background color to legend for better readability
    legend.bg.alpha = 0.7,  # Semi-transparent background for legend
    legend.text.size = 0.8,  # Adjust legend text size
    legend.title.size = 0.9,  # Adjust legend title size
    tm_title("Teen Birth Rate by County")  # Corrected to be inside tm_layout for tmap v3+
  ) +
  tm_scale_bar(
    position = c("left", "bottom"),
    breaks = c(0, 100, 200, 300, 400, 500)
  )
## tm_polygons: Deprecated tmap v3 code detected. Code translated to v4
## Warning: The 'main.title' argument of 'tm_layout()' is deprecated as of tmap
## 4.0. Please use 'tm_title()' instead.
## As of version 4.0, 'tm_scale_bar()' is deprecated. Please use 'tm_scalebar()' instead.FALSE
print(tm)
## Warning: Some legend items or map compoments do not fit well (e.g. due to the
## specified font size).
## Warning: Some legend items or map compoments do not fit well (e.g. due to the
## specified font size).

#Average Teen Birth Rate by State ##Modify State dataset

# Aggregate birth rate data to the state level by calculating the mean birth rate per state
states <- tigris::states(cb = TRUE, class = "sf") 
## Retrieving data for the year 2021
state_birthrate <- all_sf %>%
  group_by(STATE_NAME) %>%
  summarise(Avg_Birth_Rate = mean(`Birth Rate`, na.rm = TRUE), .groups = 'drop')

state_birthrate <- as.data.frame(state_birthrate)

# Perform the join


states <- left_join(states, sexedu, by = c("NAME" = "U.S. State"))
states <- left_join(states, state_birthrate, by = c("NAME" = "STATE_NAME")) 
states <- states %>%
 filter(!is.na(Avg_Birth_Rate))

##Map

tm_states_birthrate <- tm_shape(states) +
  tm_polygons("Avg_Birth_Rate", id = "NAME",
              palette = colorRampPalette(c("blue", "pink", "red"))(100), 
              title = "Average Teen Birth Rate by State",
              style = "quantile", n = 3,
              border.col = "black", border.alpha = 0.5) +
  tm_layout(
    main.title = "Average Birth Rate by State",
    main.title.position = "center",
    legend.text.size = 0.6,
    legend.title.size = 0.7) +
  tm_scale_bar(
    position = c("left", "bottom"),
    breaks = c(0, 100, 200, 300, 400, 500)
  )
## tm_polygons: Deprecated tmap v3 code detected. Code translated to v4
## Warning: The 'main.title' argument of 'tm_layout()' is deprecated as of tmap
## 4.0. Please use 'tm_title()' instead.
## As of version 4.0, 'tm_scale_bar()' is deprecated. Please use 'tm_scalebar()' instead.FALSE
# Print the map
print(tm_states_birthrate)

##Average Teen Birth Rate by State and Sex Education Level

#Map
tm_map <- tm_shape(states) +
  tm_polygons("SexEdu",
              title = "Sex Education Level",
              palette = colorRampPalette(c("lightblue", "blue", "darkblue","red"))(4),  # Ensure the palette function is called correctly
              id = "NAME") +  # Close parentheses correctly here
  tm_layout(
    main.title = "Sex Education Policy by State",
    main.title.position = "center",
    legend.text.size = 0.6,
    legend.title.size = 0.7)
## tm_polygons: Deprecated tmap v3 code detected. Code translated to v4
## Warning: The 'main.title' argument of 'tm_layout()' is deprecated as of tmap
## 4.0. Please use 'tm_title()' instead.
# Print the map
print(tm_map)

states$SexEdu <- factor(states$SexEdu, levels = c("No Sex Ed", "No Contraception/Abstinence Pregnancy Education","Abstinence", "Abstinence/Contraception"))
states$Rate_Group <- cut(states$Avg_Birth_Rate, breaks = 3, labels = c("Low", "Medium", "High"))
ggplot(data = states) +
  # Layer for color fill based on birth rate (without patterns)
  geom_sf(aes(fill = Rate_Group), color = "white", size = 0.2) +
  # Layer for patterns based on sex education (transparent fill to keep birth rate colors)
  geom_sf_pattern(aes(pattern = SexEdu),
                  fill = NA,  # No fill color, just the pattern
                  pattern_color = "black",
                  pattern_density = 0.1,
                  pattern_spacing = 0.02,
                  color = NA) +  # No border color for patterns
  scale_fill_manual(values = c("Low" = "blue", "Medium" = "pink", "High" = "red"),
                    name = "Average Birth Rate") +
  scale_pattern_manual(values = c("No Sex Ed" = "stripe",
                                  "No Contraception/Abstinence Pregnancy Education" = "crosshatch",
                                  "Abstinence" = "circle",
                                  "Abstinence/Contraception" = "none"),
                       name = "Sex Education Level") +
  labs(title = "Teenage Birth Rates and Sex Ed. Policy by State") +
  theme_minimal() +
  theme(
    legend.position = "right",
    plot.title = element_text(hjust = 0.5),
    legend.text = element_text(size = 8),  # Smaller legend text
    legend.title = element_text(size = 10), # Smaller legend title
    legend.key.size = unit(0.5, "cm"),
    panel.grid.major = element_blank(),  # Remove major gridlines
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.border = element_rect(colour = "black", fill=NA, size=1),
    axis.text.x = element_blank(),  # Remove x axis labels
    axis.text.y = element_blank(),  # Remove y axis labels
    axis.ticks = element_blank()  # Remove axis ticks
  )
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Plotting Neighbors

#See neighbors
nb<-poly2nb(all_sf_clean,queen=TRUE)
nb
## Neighbour list object:
## Number of regions: 3008 
## Number of nonzero links: 17320 
## Percentage nonzero weights: 0.1914222 
## Average number of links: 5.757979 
## 4 regions with no links:
## 713 1073 1394 1577
## 6 disjoint connected subgraphs

#Computing Spatial Autocorrelation:Global Moran’s I

Neighborhood based on contiguity (row standardized weights)

# Create weights for neighbors w/ style Binary 
nb_clean <- poly2nb(all_sf_clean)

nbw <- nb2listw(nb_clean, style = "W", zero.policy = T)

#hypothesis set to "greater", meaning I expect a positive autocorrelation
gmoran <- moran.test(all_sf_clean$`Birth Rate`, nbw,
                     alternative = "greater")
gmoran
## 
##  Moran I test under randomisation
## 
## data:  all_sf_clean$`Birth Rate`  
## weights: nbw  
## n reduced by no-neighbour observations  
## 
## Moran I statistic standard deviate = 48.27, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.5309940752     -0.0003330003      0.0001211634
moran.plot(all_sf_clean$`Birth Rate`, nbw, labels=F)

##Neighborhood based on contiguity (inverse distance weights)

centroid_coords <- st_centroid(all_sf_clean)
## Warning: st_centroid assumes attributes are constant over geometries

invert distances, such that closer areas have higher values

# compute distance for all spatial neighbour links
dists <- nbdists(nb, centroid_coords)

# Invert distances, handle zero by replacing with NA or a large number
inverted_dists <- lapply(dists, function(x) ifelse(x == 0, NA, 1/x))

# Clean NA and infinite values in inverted distances
inverted_dists <- lapply(inverted_dists, function(x) {
  x[is.na(x) | is.infinite(x)] <- 0  # Replace NA or infinite with 0 or some small number
  return(x)
})


# glist: list of general weights corresponding to neighbors; 
# Use style=B to maintain the weights as set with glist
nbb <- nb2listw(nb, glist = inverted_dists, style = "B", zero.policy = TRUE)
gmoran_inverted <- moran.test(all_sf_clean$`Birth Rate`, nbb,
                     alternative = "greater")
gmoran_inverted
## 
##  Moran I test under randomisation
## 
## data:  all_sf_clean$`Birth Rate`  
## weights: nbb  
## n reduced by no-neighbour observations  
## 
## Moran I statistic standard deviate = 34.835, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.5804905913     -0.0003330003      0.0002780115
moran.plot(all_sf_clean$`Birth Rate`, nbb, labels=F)

##Neighbors within Distance of 75km

#Check to see if coordinates are planar or geographical: Results are geographical
crs<-st_crs(all_sf_clean)
crs
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]
# Neighbors based on distance
# d1: lower distance bound 
# d2: upper distance bound in the metric of the points if planar coordinates, in km if in geographical coordinates
nb75 <- dnearneigh(x = centroid_coords, d1 = 0, d2 = 75)
        
nbw75<- nb2listw(nb75, style = "W", zero.policy = T)

#hypothesis set to "greater", meaning I expect a positive autocorrelation
gmoran75 <- moran.test(all_sf_clean$`Birth Rate`, nbw75,
                     alternative = "greater")
gmoran75
## 
##  Moran I test under randomisation
## 
## data:  all_sf_clean$`Birth Rate`  
## weights: nbw75  
## n reduced by no-neighbour observations  
## 
## Moran I statistic standard deviate = 48.779, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.5053910671     -0.0003459011      0.0001074929

##5 nearest neighbors (row standardised weights)

nb5 <- knn2nb(knearneigh(centroid_coords, k = 5)) 
plot(st_geometry(all_sf_clean), border = "lightgray")
plot.nb(nb5, st_geometry(all_sf_clean), add = TRUE, arrows=T)
## Warning in st_point_on_surface.sfc(coords): st_point_on_surface may not give
## correct results for longitude/latitude data

nbw5<- nb2listw(nb5, style = "W", zero.policy = T)

#hypothesis set to "greater", meaning I expect a positive autocorrelation
gmoran1 <- moran.test(all_sf_clean$`Birth Rate`, nbw5,
                     alternative = "greater")
gmoran1
## 
##  Moran I test under randomisation
## 
## data:  all_sf_clean$`Birth Rate`  
## weights: nbw5    
## 
## Moran I statistic standard deviate = 48.503, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.5399484531     -0.0003325574      0.0001240814
moran.plot(all_sf_clean$`Birth Rate`, nbw5, labels=F)

#Compute local Moran’s with highest Moran’s value (5 nearest neighbors)

lmoran <- localmoran(all_sf_clean$`Birth Rate`, nbw5, alternative = "two.sided")
head(lmoran)
##           Ii          E.Ii      Var.Ii       Z.Ii Pr(z != E(Ii))
## 1 0.41637243 -8.945163e-05 0.053737685 1.79653541     0.07240941
## 2 0.16105573 -1.179860e-05 0.007088511 1.91306837     0.05573930
## 3 0.75734012 -2.618316e-04 0.157267092 1.91039052     0.05608295
## 4 0.29553932 -7.669452e-05 0.046074495 1.37720207     0.16844978
## 5 0.13353838 -4.065801e-05 0.024426321 0.85469129     0.39272207
## 6 0.03793513 -6.536127e-04 0.392433505 0.06159956     0.95088173

#Plot Morans I Clusters

tmap_mode("plot")
## tmap mode set to 'plot'
## tmap mode set to interactive viewing
all_sf_clean$lmI <- lmoran[, "Ii"] # local Moran's I
# p-values corresponding to alternative greater
all_sf_clean$lmp <- lmoran[, "Pr(z != E(Ii))"]
all_sf_clean$lmI_sign <- all_sf_clean$lmI
# Handle NA values explicitly in your subsetting
all_sf_clean$lmI_sign[all_sf_clean$lmp >= 0.05 & !is.na(all_sf_clean$lmp)] <- NA

# Assign 'non-significant' to 'quadr' where p-values are not significant or are NA
all_sf_clean$quadr[all_sf_clean$lmp >= 0.05 & !is.na(all_sf_clean$lmp)] <- "non-significant"


# get quadrant information
all_sf_clean$quadr <- attributes(lmoran)$quadr$mean
levels(all_sf_clean$quadr) <- c(levels(all_sf_clean$quadr), "non-significant")
all_sf_clean[(all_sf_clean$lmp >= 0.05) & !is.na(all_sf_clean$lmp), "quadr"] <- "non-significant"

#plot
tm_shape(all_sf_clean) +
  tm_polygons("quadr", 
              palette = c("blue", "lightpink", "skyblue2", "red", "white"), 
              lwd=0.1, alpha = 0.7) +
tm_shape(states_geom) +
  tm_borders(lwd = 1.5, col = "black") + # Adding state borders
  tm_layout(main.title = "Local Moran's I Clusters by County",
          main.title.size = 0.8,
          legend.title.size = 0.7,
          legend.text.size = 0.6)
## tm_polygons: Deprecated tmap v3 code detected. Code translated to v4
## Warning: The 'main.title' argument of 'tm_layout()' is deprecated as of tmap
## 4.0. Please use 'tm_title()' instead.

#Table of Counties in each group

table(all_sf_clean$quadr)
## 
##         Low-Low        High-Low        Low-High       High-High non-significant 
##             364              21              39             396            2188

#Compute local Moran’s with highest Moran’s value (Inverted Distance)

lmoran <- localmoran(all_sf_clean$`Birth Rate`, nbb, alternative = "two.sided")
head(lmoran)
##            Ii          E.Ii       Var.Ii      Z.Ii Pr(z != E(Ii))
## 1 0.058637457 -1.222702e-05 8.424578e-04 2.0206524     0.04331576
## 2 0.015460107 -1.500238e-06 8.417534e-05 1.6852417     0.09194195
## 3 0.116428945 -4.541539e-05 3.030178e-03 2.1159054     0.03435286
## 4 0.035524414 -1.023337e-05 6.058169e-04 1.4437144     0.14881932
## 5 0.016654504 -4.791399e-06 4.491266e-04 0.7860899     0.43181482
## 6 0.004505557 -2.825977e-05 5.589420e-04 0.1917699     0.84792241

#Plot Morans I Clusters

tmap_mode("plot")
## tmap mode set to 'plot'
## tmap mode set to interactive viewing
all_sf_clean$lmI <- lmoran[, "Ii"] # local Moran's I
# p-values corresponding to alternative greater
all_sf_clean$lmp <- lmoran[, "Pr(z != E(Ii))"]
all_sf_clean$lmI_sign <- all_sf_clean$lmI
# Handle NA values explicitly in your subsetting
all_sf_clean$lmI_sign[all_sf_clean$lmp >= 0.05 & !is.na(all_sf_clean$lmp)] <- NA

# Assign 'non-significant' to 'quadr' where p-values are not significant or are NA
all_sf_clean$quadr[all_sf_clean$lmp >= 0.05 & !is.na(all_sf_clean$lmp)] <- "non-significant"


# get quadrant information
all_sf_clean$quadr <- attributes(lmoran)$quadr$mean
levels(all_sf_clean$quadr) <- c(levels(all_sf_clean$quadr), "non-significant")
all_sf_clean[(all_sf_clean$lmp >= 0.05) & !is.na(all_sf_clean$lmp), "quadr"] <- "non-significant"

#plot
tm_shape(all_sf_clean) +
  tm_polygons("quadr", 
              palette = c("blue", "lightpink", "skyblue2", "red", "white"), 
              lwd=0.1, alpha = 0.7) +
tm_shape(states_geom) +
  tm_borders(lwd = 1.5, col = "black") + # Adding state borders
  tm_layout(main.title = "Local Moran's I Clusters by County",
          main.title.size = 0.8,
          legend.title.size = 0.7,
          legend.text.size = 0.6)
## tm_polygons: Deprecated tmap v3 code detected. Code translated to v4
## Warning: The 'main.title' argument of 'tm_layout()' is deprecated as of tmap
## 4.0. Please use 'tm_title()' instead.

Table of Counties in each group

table(all_sf_clean$quadr)
## 
##         Low-Low        High-Low        Low-High       High-High non-significant 
##             320             301              85             147            2155

##Neighborhood based on contiguity (row standardized weights):

lmoran <- localmoran(all_sf_clean$`Birth Rate`, nbw, alternative = "two.sided")
head(lmoran)
##             Ii          E.Ii      Var.Ii         Z.Ii Pr(z != E(Ii))
## 1  0.444257284 -8.945163e-05 0.044766487  2.100126664      0.0357177
## 2  0.115669325 -1.179860e-05 0.005059849  1.626273997      0.1038914
## 3  0.631426794 -2.618316e-04 0.098193706  2.015864006      0.0438142
## 4  0.252751872 -7.669452e-05 0.032888428  1.394134209      0.1632771
## 5  0.155829267 -4.065801e-05 0.030543072  0.891879072      0.3724578
## 6 -0.005137012 -6.536127e-04 0.280122898 -0.008470968      0.9932412

###Plot Morans I Clusters

## tmap mode set to interactive viewing
all_sf_clean$lmI <- lmoran[, "Ii"] # local Moran's I
# p-values corresponding to alternative greater
all_sf_clean$lmp <- lmoran[, "Pr(z != E(Ii))"]
all_sf_clean$lmI_sign <- all_sf_clean$lmI
# Handle NA values explicitly in your subsetting
all_sf_clean$lmI_sign[all_sf_clean$lmp >= 0.05 & !is.na(all_sf_clean$lmp)] <- NA

# Assign 'non-significant' to 'quadr' where p-values are not significant or are NA
all_sf_clean$quadr[all_sf_clean$lmp >= 0.05 & !is.na(all_sf_clean$lmp)] <- "non-significant"


# get quadrant information
all_sf_clean$quadr <- attributes(lmoran)$quadr$mean
levels(all_sf_clean$quadr) <- c(levels(all_sf_clean$quadr), "non-significant")
all_sf_clean[(all_sf_clean$lmp >= 0.05) & !is.na(all_sf_clean$lmp), "quadr"] <- "non-significant"

#plot
tm_shape(all_sf_clean) +
  tm_polygons("quadr", 
              palette = c("blue", "lightpink", "skyblue2", "red", "white"), 
              lwd=0.1, alpha = 0.7) +
tm_shape(states_geom) +
  tm_borders(lwd = 1.5, col = "black") + # Adding state borders
  tm_layout(main.title = "Local Moran's I Clusters by County",
          main.title.size = 0.8,
          legend.title.size = 0.7,
          legend.text.size = 0.6)
## tm_polygons: Deprecated tmap v3 code detected. Code translated to v4
## Warning: The 'main.title' argument of 'tm_layout()' is deprecated as of tmap
## 4.0. Please use 'tm_title()' instead.

table(all_sf_clean$quadr)
## 
##         Low-Low        High-Low        Low-High       High-High non-significant 
##             373              24              48             430            2133